#################################################################
#################################################################
######## SPATIAL ANALYSIS OF GEOCODED AND MERGED DATA ###############
#################################################################
#################################################################

pacman::p_load(stringdist, data.table, dplyr, sf, cowplot, viridis, ggplot2, haven, osmdata, qs)

## Preliminaries
rm(list = ls())

source("code/globals.R")

#------------------ Load working dataset from geocode script -----------------#
zillowdata <- qread(file.path(WORKING, "zillowdata.qs"))


###############################################################################################
###### STEP 2: Merge fire perimeters to Damage Assessment Data for Homes ############################
###############################################################################################

# Load list of fire ids
fire_ids <- readRDS(file.path(WORKING, "fire_ids.RDS")) %>%
  dplyr::rename(UNIT_AND_NUMBER = incidentid)

################ Make a point dataset of all destroyed homes.
h <- zillowdata %>%
  dplyr::filter(!is.na(lon) & !is.na(lat)) %>%
  dplyr::filter(destroyed == TRUE) %>%
  st_as_sf(
    coords = c("lon", "lat"),
    # agr = "constant",
    crs = "+proj=longlat +datum=WGS84",
    stringsAsFactors = FALSE,
    remove = TRUE
  ) %>%
  st_transform(crs = 3310) %>% # California Albers
  merge(fire_ids[, c("UNIT_AND_NUMBER", "incidentname", "StartDate")], by = "UNIT_AND_NUMBER", all.x = TRUE) %>%
  dplyr::rename(dinsname = incidentname, dinsdate = StartDate) %>%
  dplyr::mutate(dinsyear = year(dinsdate))

#################### Define the function to do the spatial merging to identify candidate perimeters
## The 'perims_id_var' argument is necessary because the variable names in the various perimeter datasets are different.

findperimeters <- function(destroyedhomes_incid, perimfile, savefolder) {
  dir.create(file.path(RES, "incident_maps", "perimeter_identification", savefolder), showWarnings = FALSE)
  dpoints <- h %>%
    dplyr::filter(UNIT_AND_NUMBER == destroyedhomes_incid) %>%
    dplyr::select(UNIT_AND_NUMBER, dinsname, dinsdate, dinsyear)
  candidates <- st_join(perimfile, dpoints, left = FALSE) %>%
    dplyr::filter(yearperim >= dinsyear - 1 & yearperim <= dinsyear + 1)
  possibleperims <- candidates %>%
    dplyr::filter(!duplicated(perimidvar))
  ## Plot individual components
  for (i in seq_len(nrow(possibleperims))) {
    p <- ggplot() +
      geom_sf(data = possibleperims[i, ], aes(color = as.factor(perimidvar)), fill = NA) +
      geom_sf(data = dpoints) +
      theme_map()
    save_plot(file.path(RES, "incident_maps", "perimeter_identification", savefolder, paste0(destroyedhomes_incid, "_", i, ".png")), plot = p)
  }
  return(possibleperims)
}

# __________________________ Part 2A: California fires.
#### For these I use FRAP for all except the 2020 fires, which use NIFS perimeters.


################ List of California fires
ca_fire_ids <- fire_ids %>%
  dplyr::filter(State == "California")
incidentids_ca <- ca_fire_ids$UNIT_AND_NUMBER

########## Part 1: FRAP perimeters (available through 2020) ################

## confirm understand structure of IDs in FRAP --
## Unit ID and Inc Num uniquely identify fires in 2017 and later
## However - if we eventually bring in earlier years, note that 2016 includes a duplicated id.
## Update: The one duplicated fire in 2016 is a 12.5 acre small fire that does not appear in DINS:
## To see this, look for CASHF 001890 in DINS and see it's not there.

# Load FRAP fire perimeters data and pre-process
st_layers(file.path(INPUT_PUBLIC, "FirePerimeters", "frap_perim", "fire20_1", "fire20_1.gdb"))
perims_raw <- st_read(file.path(INPUT_PUBLIC, "FirePerimeters", "frap_perim", "fire20_1", "fire20_1.gdb"), layer = "firep20_1") %>%
  st_transform(crs = st_crs(h)) %>%
  st_cast("MULTIPOLYGON") %>%
  dplyr::mutate(perimidvar = 1:nrow(.)) %>% # Create a temporary unique ID
  dplyr::mutate(yearperim = YEAR_) %>%
  dplyr::select(perimidvar, yearperim, AGENCY, UNIT_ID, FIRE_NAME, INC_NUM, ALARM_DATE, COMMENTS, GIS_ACRES, FIRE_NUM)
stopifnot(anyDuplicated(perims_raw$perimidvar) == 0) # confirm the unique id in the nifc shapefile

# Apply my findperimeters function to identify candidate perims for each incident based on spatial and temporal overlap
biglist <- lapply(incidentids_ca, findperimeters,
  perimfile = perims_raw,
  savefolder = "frap"
)
perims <- do.call(rbind, biglist)

## Review spatial matches by hand to confirm quality
# compare reported start dates across the datasets
check_ca <- perims %>%
  st_drop_geometry() %>%
  dplyr::select(perimidvar, yearperim, UNIT_ID, FIRE_NAME, INC_NUM, COMMENTS, FIRE_NUM, GIS_ACRES, ALARM_DATE, UNIT_AND_NUMBER) %>%
  merge(ca_fire_ids[, c("UNIT_AND_NUMBER", "StartDate", "incidentname")],
    by = "UNIT_AND_NUMBER", all.x = TRUE
  ) %>%
  dplyr::select(yearperim, perimidvar, COMMENTS, UNIT_AND_NUMBER, FIRE_NAME, incidentname, ALARM_DATE, StartDate) %>%
  mutate(suspect = abs(as.Date(StartDate) - as.Date(ALARM_DATE)) > 5) %>%
  arrange(StartDate, UNIT_AND_NUMBER)
rm(check_ca)

## Based on this review, correct 3 erroneous matches:
updatebadmatches <- function(df, dinsincid, frapfirename) {
  stopifnot(nrow(perims[perims$UNIT_AND_NUMBER == dinsincid &
    perims$FIRE_NAME == frapfirename, ]) == 1) # Make sure this is dropping exactly 1 row (no less, no more; protects against changes of id vars etc.)
  newdf <- df %>%
    dplyr::filter(!(UNIT_AND_NUMBER == dinsincid & FIRE_NAME == frapfirename))
  return(newdf)
}
perims <- perims %>%
  updatebadmatches("CALNU 007582", "KUGELMAN") %>%
  updatebadmatches("CANEU 026269", "DOUGLAS") %>%
  updatebadmatches("CALNU 013407", "QUAIL") %>%
  dplyr::select(UNIT_AND_NUMBER, FIRE_NAME, yearperim) %>%
  rename(FireNamePerim = FIRE_NAME) %>%
  mutate(datasource = "FRAP") %>%
  st_cast("MULTIPOLYGON") # Avoid Unknown WKB type 12 issue later)

## Additional hand review of un-merged DINS fires identifies a few more matches to add
investigate_match <- function(dinsincid, frapunitid, frapincnum) {
  ggplot() +
    geom_sf(data = perims_raw[perims_raw$UNIT_ID == frapunitid & perims_raw$INC_NUM == frapincnum, ], fill = NA, color = "black") +
    geom_sf(data = h[h$UNIT_AND_NUMBER == dinsincid, ], color = "red")
}
investigate_match("CASLU 008948", "SLU", "008948") # CHIMNEY
investigate_match("CAORC 105068", "ORC", "17105068") # CANYON
investigate_match("CASHU 006565", "SHU", "00006589") # CREEK
investigate_match("CAVNC 090993", "VNC", "00090993") # HILL
investigate_match("CASLU 009669", "SLU", "00009669") # BRANCH
investigate_match("CASLU 009866", "SLU", "00009866") # POND

addedmatches <- perims_raw %>%
  mutate(UNIT_AND_NUMBER = "0") %>%
  mutate(UNIT_AND_NUMBER = if_else(condition = UNIT_ID == "SLU" & INC_NUM == "008948", true = "CASLU 008948", false = UNIT_AND_NUMBER)) %>%
  mutate(UNIT_AND_NUMBER = if_else(condition = UNIT_ID == "ORC" & INC_NUM == "17105068", true = "CAORC 105068", false = UNIT_AND_NUMBER)) %>%
  mutate(UNIT_AND_NUMBER = if_else(condition = UNIT_ID == "SHU" & INC_NUM == "00006589", true = "CASHU 006565", false = UNIT_AND_NUMBER)) %>%
  mutate(UNIT_AND_NUMBER = if_else(condition = UNIT_ID == "VNC" & INC_NUM == "00090993", true = "CAVNC 090993", false = UNIT_AND_NUMBER)) %>%
  mutate(UNIT_AND_NUMBER = if_else(condition = UNIT_ID == "SLU" & INC_NUM == "00009669", true = "CASLU 009669", false = UNIT_AND_NUMBER)) %>%
  mutate(UNIT_AND_NUMBER = if_else(condition = UNIT_ID == "SLU" & INC_NUM == "00009866", true = "CASLU 009866", false = UNIT_AND_NUMBER)) %>%
  dplyr::filter(UNIT_AND_NUMBER != "0") %>%
  rename(FireNamePerim = FIRE_NAME) %>%
  dplyr::select(UNIT_AND_NUMBER, FireNamePerim, yearperim) %>%
  mutate(datasource = "FRAP") %>%
  st_cast("MULTIPOLYGON")

stopifnot(nrow(addedmatches) == 6)

perims <- perims %>%
  rbind(addedmatches)

## Which fires still don't merge? Most did not destroy any homes.
setdiff(ca_fire_ids$UNIT_AND_NUMBER, perims$UNIT_AND_NUMBER)
failed <- which(ca_fire_ids$UNIT_AND_NUMBER %in% setdiff(ca_fire_ids$UNIT_AND_NUMBER, perims$UNIT_AND_NUMBER))
ca_fire_ids[failed, ][order(StartDate)]

rm(perims_raw)

## -- Map all California perimeters
ca_boundary <- st_read(file.path(INPUT_PUBLIC, "CensusTiger", "tl_2019_us_county", "tl_2019_us_county.shp")) %>%
  dplyr::filter(STATEFP == "06") %>%
  st_transform(crs = st_crs(perims))

p <- ggplot(data = perims) +
  geom_sf(aes(color = yearperim), alpha = 0.75, fill = NA) +
  geom_sf(data = ca_boundary, color = "gray", fill = NA) +
  scale_color_discrete() +
  theme_map()
save_plot(file.path(RES, "CA_perims.png"), plot = p)
rm(p)

# __________________________ Part 2B: Non-California fires.
#### For these I use spatial merges to identify candidate perimeters as polygons that contains destroyed homes.

################ List of non-California fires
otherstate_fire_ids <- fire_ids %>%
  dplyr::filter(State != "California")
incidentids_notca <- otherstate_fire_ids$UNIT_AND_NUMBER

########### First option where possible is to use the MTBS perimeters ############

perims_mtbs <- st_read(file.path(INPUT_PUBLIC, "FirePerimeters", "MTBS", "mtbs_perimeter_data", "mtbs_perims_DD.shp")) %>%
  st_cast("MULTIPOLYGON") %>%
  st_transform(crs = st_crs(h)) %>%
  dplyr::mutate(
    perimidvar = Event_ID, yearperim = year(Ig_Date),
    datasource = "MTBS", FireNamePerim = Incid_Name
  ) %>%
  dplyr::select(perimidvar, yearperim, Ig_Date, Incid_Name, Comment, datasource, FireNamePerim)

biglist <- lapply(incidentids_notca,
  findperimeters,
  perimfile = perims_mtbs,
  savefolder = "mtbs"
)
mtbs_merged <- do.call(rbind, biglist)
rm(biglist)

######### If not in MTBS, can use NIFS perimeters for 2000--2018 fire ################
## -Read the NIFC data and convert CRS
perims_nifc_old <- st_read(file.path(
  INPUT_PUBLIC, "FirePerimeters", "NIFC_current_perims", "Historic_GeoMAC_Perimeters_Combined_2000-2018-shp",
  "US_HIST_FIRE_PERIMTRS_2000_2018_DD83.shp"
)) %>%
  st_make_valid() %>%
  st_transform(crs = st_crs(h)) %>%
  dplyr::mutate(
    perimidvar = FID, yearperim = fireyear,
    datasource = "NIFC_old", FireNamePerim = paste(incidentna, complexnam, sep = "_")
  ) %>%
  dplyr::select(perimidvar, yearperim, incidentna, complexnam, datecurren, comments, datasource, FireNamePerim)

stopifnot(anyDuplicated(perims_nifc_old$FID) == 0) # confirm the unique id in the nifc shapefile

biglist <- lapply(incidentids_notca, findperimeters,
  perimfile = perims_nifc_old,
  savefolder = "nifc_old"
)
nifc_old_merged <- do.call(rbind, biglist)
rm(biglist)

######### For 2019 and 2020 fires, use "current" NIFS perimeters 2000--2018 fire ################
## -Read the NIFC data and convert CRS
perims_nifc_current <- st_read(file.path(INPUT_PUBLIC, "FirePerimeters", "NIFC_current_perims", "Current_Wildfire_Perimeters", "Public_NIFS_Perimeters.shp")) %>%
  st_make_valid() %>%
  st_transform(crs = st_crs(h)) %>%
  dplyr::mutate(
    perimidvar = OBJECTID, yearperim = year(CreateDate),
    datasource = "NIFC_current", FireNamePerim = paste(IncidentNa, ComplexNam, sep = "_")
  ) %>%
  dplyr::select(perimidvar, yearperim, IncidentNa, ComplexNam, CreateDate, Comments, datasource, FireNamePerim)

stopifnot(anyDuplicated(perims_nifc_old$OBJECTID) == 0) # confirm the unique id in the nifc shapefile

biglist <- lapply(incidentids_notca, findperimeters,
  perimfile = perims_nifc_current,
  savefolder = "nifc_current"
)
nifc_current_merged <- do.call(rbind, biglist)

# For Cameron Peak Fire and Highpark Fire, we also have perimeters supplied by the county
perims_cpf <- st_read(file.path(INPUT_PUBLIC, "DamageData", "NotDINS", "notCA", "Colorado_Larimer", "Boomhower Request", "Boomhower Request", "Cameron Peak Fire Shapefiles", "CPF_Fire_Perimeter.shp")) %>%
  dplyr::mutate(
    UNIT_AND_NUMBER = "CameronPeakFire_8069_2020", yearperim = 2020, datasource = "FromCounty",
    perimidvar = "LarimerCPF", FireNamePerim = "CameronPeakFire"
  ) %>%
  dplyr::select(UNIT_AND_NUMBER, yearperim, datasource, perimidvar, FireNamePerim) %>%
  st_transform(crs = st_crs(h)) %>%
  st_cast("MULTIPOLYGON")

perims_hp <- st_read(file.path(INPUT_PUBLIC, "DamageData", "NotDINS", "notCA", "Colorado_Larimer", "Boomhower Request", "Boomhower Request", "High Park Fire Shapefiles", "HPF_Fire_Perimeter.shp")) %>%
  dplyr::mutate(
    UNIT_AND_NUMBER = "HighparkFire_8069_2012", yearperim = 2012, datasource = "FromCounty",
    perimidvar = "LarimerHP", FireNamePerim = "Highpark"
  ) %>%
  dplyr::select(UNIT_AND_NUMBER, yearperim, datasource, perimidvar, FireNamePerim) %>%
  st_transform(crs = st_crs(h)) %>%
  st_cast("MULTIPOLYGON")

perimvars <- c("UNIT_AND_NUMBER", "perimidvar", "datasource", "yearperim", "FireNamePerim")
perims_notCA <- mtbs_merged[, perimvars] %>%
  rbind(nifc_old_merged[, perimvars]) %>%
  rbind(nifc_current_merged[, perimvars]) %>%
  rbind(perims_cpf[, perimvars]) %>%
  rbind(perims_hp[, perimvars])

## Compare all perimeter data sources for each incident
for (k in unique(perims_notCA$UNIT_AND_NUMBER)) {
  p <- ggplot(data = perims_notCA[perims_notCA$UNIT_AND_NUMBER == k, ]) +
    geom_sf(aes(color = as.factor(datasource)), fill = NA) +
    geom_sf(data = h[h$UNIT_AND_NUMBER == k, ]) +
    theme_map()
  save_plot(file.path(RES, "incident_maps", "perimeters", paste0(k, ".png")), plot = p)
}

## Compile final perimeter list - choose which source to keep for each incident based on hierarchy of source quality.
perims_notCA$datasource <- factor(perims_notCA$datasource,
  levels = c("MTBS", "FromCounty", "NIFC_old", "NIFC_current"),
  ordered = TRUE
)
perims_notCA <- perims_notCA %>%
  dplyr::arrange(UNIT_AND_NUMBER, datasource) %>%
  dplyr::group_by(UNIT_AND_NUMBER) %>%
  dplyr::mutate(bestavailablesource = min(datasource)) %>%
  dplyr::ungroup() %>%
  dplyr::filter(datasource == bestavailablesource) %>%
  dplyr::select(-bestavailablesource, -perimidvar) %>%
  rename(Shape = geometry) # https://github.com/r-spatial/sf/issues/719 deserves eventual follow-up with st_set_geometry, etc as described at that link

# COMBINE
perims <- rbind(perims, perims_notCA)

## Which fires don't merge? -- 2020 fires, plus actual failed merges for pre-2020
setdiff(fire_ids$UNIT_AND_NUMBER, perims$UNIT_AND_NUMBER)
failed <- which(fire_ids$UNIT_AND_NUMBER %in% setdiff(fire_ids$UNIT_AND_NUMBER, perims$UNIT_AND_NUMBER))
fire_ids[failed, ][order(StartDate)]

## -- Map all perimeters in all states
county_boundary <- st_read(file.path(INPUT_PUBLIC, "CensusTiger", "tl_2019_us_county", "tl_2019_us_county.shp")) %>%
  dplyr::filter(STATEFP %in% c("04", "06", "08", "16", "30", "32", "35", "41", "49", "53", "56")) %>%
  st_transform(crs = st_crs(perims))

p <- ggplot(data = perims) +
  geom_sf(aes(color = as.factor(yearperim)), alpha = 0.75, fill = NA) +
  geom_sf(data = county_boundary, color = "gray", fill = NA) +
  scale_color_discrete() +
  theme_map()
save_plot(file.path(RES, "allperims_allstates.png"), plot = p)

rm(
  p, perims_cpf, perims_hp, perims_mtbs, perims_nifc_old, perims_nifc_current,
  h, county_boundary, mtbs_merged, nifc_current_merged, nifc_old_merged,
  ca_fire_ids, otherstate_fire_ids, perims_notCA
)

## -- Save an sf object with all relevant perimeters
qsave(perims, file.path(WORKING, "cleanedperimeters.qs"))

###############################################################################################
###### STEP 3: Identify homes that sit inside of each perimeter ###############################
###############################################################################################

# Load perimeters
perims <- qread(file.path(WORKING, "cleanedperimeters.qs"))

homes_sf <- zillowdata %>%
  dplyr::filter(!is.na(lon) & !is.na(lat)) %>%
  st_as_sf(
    coords = c("lon", "lat"),
    # agr = "constant",
    # crs = "+proj=longlat +datum=WGS84",
    crs = 4326,
    stringsAsFactors = FALSE,
    remove = TRUE
  ) %>%
  st_transform(crs = st_crs(perims)) %>%
  mutate(temphomeid = seq(1:n()))

## Function to identify homes in ONE GIVEN incident perimeter, given some buffer
join_homes_to_specific_perim <- function(buffer_in_meters, thisperim) {
  print(thisperim)
  perimrows <- perims %>%
    dplyr::filter(UNIT_AND_NUMBER == thisperim) %>%
    mutate(UNIT_AND_NUMBER = NULL)
  homes_in_perims <- homes_sf %>%
    dplyr::filter(UNIT_AND_NUMBER == thisperim)
  checkdupes <- homes_in_perims %>%
    group_by(ImportParcelID, BuildingOrImprovementNumber) %>%
    dplyr::filter(row_number() == 1)
  stopifnot(nrow(checkdupes) == nrow(homes_in_perims)) # confirm no dupes BEFORE the spatial merge
  rm(checkdupes)

  # Compute distance to perimeter edge
  perim_line <- perimrows %>% st_cast(to = "MULTILINESTRING")

  # dist_to_perim will be negative if the home is within perimeter and positive is with
  dists <- ifelse(st_intersects(homes_in_perims, perimrows, sparse = F), -1, 1) * as.vector(st_distance(perim_line, homes_in_perims))

  # If this is a matrix (in the case of multiple perimeters for this fire), take the minimum distance for each row
  if (is.matrix(dists)) {
    dists <- apply(dists, 1, min)
  }

  homes_in_perims$dist_to_perim <- dists

  # stopifnot(nrow(dplyr::distinct(st_drop_geometry(homes_in_perims),ImportParcelID,BuildingOrImprovementNumber))==nrow(homes_in_perims)) #confirm no dupes BEFORE the spatial merge
  homes_in_perims <- homes_in_perims %>%
    st_join(st_buffer(perimrows, buffer_in_meters), left = FALSE) %>% # The "left=FALSE" option does an inner join, dropping homes not in the perimeters
    group_by(ImportParcelID, BuildingOrImprovementNumber) %>%
    dplyr::filter(row_number() == 1) %>% # Sometimes perimeters are composed of 2 or more shapes, and if buffers are large enough can get dupe results from st_join.
    ungroup() %>%
    # dplyr::distinct(ImportParcelID,BuildingOrImprovementNumber,.keep_all=TRUE) %>% #Sometimes perimeters are composed of 2 or more shapes, and if buffers are large enough can get dupe results from st_join.
    dplyr::select(
      temphomeid, destroyed, FIPS, ImportParcelID,
      BuildingOrImprovementNumber, YearBuilt, single_family, UNIT_AND_NUMBER,
      PropertyStreetPreDirectional, PropertyStreetName, PropertyStreetSuffix,
      PropertyStreetPostDirectional, PropertyZip, PropertyHouseNumber, Side,
      street, street100, street25, State, dist_to_perim
    )
  return(homes_in_perims)
}

## Function to call previous function twice, identifying homes INSIDE the perimeter and homes OUTSIDE but in the buffer
select_homes_by_inc <- function(buffer, firelist) {
  # firelist = incids[1]
  # buffer = 10


  insidehomes <- join_homes_to_specific_perim(0, firelist)
  bufferhomes <- join_homes_to_specific_perim(buffer, firelist) %>%
    dplyr::filter(!(ImportParcelID %in% insidehomes$ImportParcelID))
  allhomes <- rbind(insidehomes, bufferhomes) %>%
    mutate(OUTSIDE = if_else(ImportParcelID %in% bufferhomes$ImportParcelID, TRUE, FALSE))

  # stopifnot(nrow(distinct(allhomes,ImportParcelID,BuildingOrImprovementNumber))==nrow(allhomes))

  checkdupes <- allhomes %>%
    group_by(ImportParcelID, BuildingOrImprovementNumber) %>%
    dplyr::filter(row_number() == 1)
  stopifnot(nrow(checkdupes) == nrow(allhomes))
  rm(checkdupes)
  return(allhomes)
}

#########################################################
######## CREATE MAIN ANALYSIS DATASET ###################
#########################################################

### Identify homes in main analysis dataset: Homes within perimeter (plus buffer) for each incident, stacked incidents.

## This can only include incidents where we can identify a matching fire perimeter.
incids <- unique(zillowdata$UNIT_AND_NUMBER) %>%
  base::intersect(unique(perims$UNIT_AND_NUMBER))

# Exclude a fire with 0 homes inside the perimeter and 0 inside buffer.
fires_to_ignore <- c("CAVNC 090993")
incids <- incids[which(!incids %in% fires_to_ignore)]

### Apply the perimeter merge function to each fire incident and combine them.
inclist <- lapply(incids, select_homes_by_inc, buffer = 10)
stacked <- do.call(rbind, inclist)

# A handful of dist_to_perim values are >10, these are destroyed homes
stacked %>%
  pull(dist_to_perim) %>%
  summary()


# make an object to use when calculating neighbors below.
neighbors <- stacked[, c(
  "ImportParcelID", "BuildingOrImprovementNumber", "UNIT_AND_NUMBER", "OUTSIDE",
  "PropertyStreetPreDirectional", "PropertyStreetName", "PropertyStreetSuffix",
  "PropertyStreetPostDirectional", "PropertyZip", "PropertyHouseNumber", "Side",
  "street", "street100", "street25", "State", "dist_to_perim"
)]

# Save a version of the data before dropping spatial info -- to use in scripts that identify spatial variables and neighbors
qsave(
  neighbors[, c("ImportParcelID", "BuildingOrImprovementNumber", "UNIT_AND_NUMBER", "State")],
  file.path(WORKING, "homepoints.qs")
)

# Merge back to complete data table to get other attributes
stacked <- stacked %>% st_transform(stacked, crs = "+proj=longlat +datum=WGS84") # Convert back from the projected CRS
stacked[, c("lon", "lat")] <- st_coordinates(stacked)

stacked <- stacked %>%
  st_drop_geometry() %>%
  data.table() %>%
  rename(incidentid = UNIT_AND_NUMBER) %>%
  dplyr::select(ImportParcelID, BuildingOrImprovementNumber, incidentid, OUTSIDE, Side, dist_to_perim)

#### Confirm all rows of processed data are also in main data
FullRegDataDuplicatedControls <- readRDS(file.path(WORKING, "FullRegDataDuplicatedControls.RDS"))
a <- fsetdiff(
  stacked[, .(ImportParcelID, BuildingOrImprovementNumber, incidentid)],
  FullRegDataDuplicatedControls[, .(ImportParcelID, BuildingOrImprovementNumber, incidentid)]
)
stopifnot(nrow(a) == 0)
rm(a)
stacked <- merge(stacked, FullRegDataDuplicatedControls, by = c("ImportParcelID", "BuildingOrImprovementNumber", "incidentid"))

#- confirm unique ids in the merged dataset
anyDuplicated(stacked[, c("ImportParcelID", "BuildingOrImprovementNumber", "incidentid")])



# Save dataset
qsave(stacked, file.path(WORKING, "maindata.qs"))

# DEBUG

######################################################################################################
##### Data check: How often are homes far outside the perimeter destroyed? This looks to be rare. ####
######################################################################################################
#-- Uncomment to produce this validity check -- about 99% of destroyed homes are within 10m.
## Note: this tracks destroyed homes only.

outsideperims <- data.frame(
  incidentid = incids,
  inside = NA_integer_,
  within10 = NA_integer_,
  beyond10 = NA_integer_
)
for (j in incids) {
  a <- select_homes_by_inc(0, j) %>%
    st_drop_geometry() %>%
    dplyr::filter(destroyed == TRUE) %>%
    summarize(number = n())
  b <- select_homes_by_inc(10, j) %>%
    st_drop_geometry() %>%
    dplyr::filter(destroyed == TRUE) %>%
    summarize(number = n()) %>%
    mutate(number = number - a$number)
  c <- select_homes_by_inc(1000, j) %>%
    st_drop_geometry() %>%
    dplyr::filter(destroyed == TRUE) %>%
    summarize(number = n()) %>%
    mutate(number = number - (b$number + a$number))
  outsideperims <- outsideperims %>%
    mutate(inside = if_else(incidentid == j, a$number, inside)) %>%
    mutate(within10 = if_else(incidentid == j, b$number, within10)) %>%
    mutate(beyond10 = if_else(incidentid == j, c$number, beyond10))
  rm(a, b, c)
}
outsidetotals <- outsideperims %>%
  summarize(
    inside = sum(inside),
    within10 = sum(within10),
    beyond10 = sum(beyond10)
  ) %>%
  mutate(sharebeyond10m = beyond10 / (inside + within10))
outsidetotals

dir.create(file.path(RES, "validitychecks"), showWarnings = FALSE)
write.csv(outsidetotals, file.path(RES, "validitychecks", "destroyed_outside_10m_buffer.csv"))



#########################################################################
################### -- Plots of Destroyed vs Not by incident ############
#########################################################################

buffer <- 10


plot_destroyed_vs_not <- function(f) {
  plothomes <- select_homes_by_inc(buffer, f)
  p <- ggplot(perims %>% dplyr::filter(UNIT_AND_NUMBER == f)) +
    geom_sf() +
    geom_sf(data = st_buffer(perims[perims$UNIT_AND_NUMBER == f, ], buffer), color = "red", fill = NA) +
    geom_sf(
      data = plothomes,
      mapping = aes(color = destroyed, shape = OUTSIDE), alpha = 0.5, show.legend = "point"
    ) +
    scale_colour_brewer(palette = "Set2") +
    guides(colour = guide_legend(override.aes = list(alpha = 1))) +
    theme_map()
  dir.create(file.path(RES, "incident_maps", "geocoded", "destroyed"), showWarnings = FALSE)
  save_plot(file.path(RES, "incident_maps", "geocoded", "destroyed", paste0(f, ".png")), plot = p)
}

lapply(incids, plot_destroyed_vs_not)
rm(buffer)


#########################################
##### PRETTY MAP ALL BIG FIRES  ####
#########################################

cities <- read.csv(file.path(INPUT_PUBLIC, "simplemaps_uscities_basicv1.72", "uscities.csv"))
cities <- st_as_sf(cities,
  coords = c("lng", "lat"),
  crs = "+proj=longlat +datum=WGS84",
  stringsAsFactors = FALSE,
  remove = FALSE
)

bigincids <- fire_ids$UNIT_AND_NUMBER[fire_ids$DestroyedHomes >= 100]

pretty_map_fires <- function(i) {
  print(i)

  ### Get homes inside perimeter and re-project to lat/long
  maphomes <- select_homes_by_inc(buffer = 10, firelist = i)
  levels(maphomes$destroyed) <- c("Survived", "Destroyed")
  maphomes <- st_transform(maphomes, "+proj=longlat +datum=WGS84")

  ### Get relevant perimeter shapes and reproject to lat/long
  ## Also apply a buffer to the perimeter to get a bounding box around buffered shape
  plotperim <- dplyr::filter(perims, UNIT_AND_NUMBER == i)
  bufferedperim <- st_buffer(plotperim, 2000)
  plotperim <- st_transform(plotperim, crs = "+proj=longlat +datum=WGS84")
  bufferedperim <- st_transform(bufferedperim, crs = "+proj=longlat +datum=WGS84")
  box <- unname(st_bbox(bufferedperim))

  ### Get up to 6 largest cities by population
  plotcities <- cities[
    cities$lng >= box[1] & cities$lng <= box[3] &
      cities$lat >= box[2] & cities$lat <= box[4],
    c("city", "population")
  ]
  plotcities <- plotcities[order(plotcities$population, decreasing = TRUE), ]
  maxlabels <- 6
  if (nrow(plotcities) > maxlabels) {
    plotcities$poprank <- seq(1, nrow(plotcities))
    plotcities <- plotcities[plotcities$poprank <= maxlabels, ]
  }

  ### Query OSM for street data
  big_streets <- c(box[1], box[2], box[3], box[4]) %>%
    opq() %>%
    add_osm_feature(
      key = "highway",
      value = c("motorway", "primary", "motorway_link", "primary_link")
    ) %>%
    osmdata_sf()

  med_streets <- c(box[1], box[2], box[3], box[4]) %>%
    opq() %>%
    add_osm_feature(
      key = "highway",
      value = c("secondary", "tertiary", "secondary_link", "tertiary_link")
    ) %>%
    osmdata_sf()


  small_streets <- c(box[1], box[2], box[3], box[4]) %>%
    opq() %>%
    add_osm_feature(
      key = "highway",
      value = c(
        "residential", "living_street",
        "unclassified",
        "service", "footway"
      )
    ) %>%
    osmdata_sf()

  names <- c(box[1], box[2], box[3], box[4]) %>%
    opq() %>%
    add_osm_feature(
      key = "place",
      value = c(
        "city", "locality",
        "municipality",
        "town", "village"
      )
    ) %>%
    osmdata_sf()

  # nature <- c(box[1],box[2],box[3],box[4])%>%
  #   opq()%>%
  #   add_osm_feature(key = "natural",
  #                   value = c("coastline", "peak",
  #                             "ridge",
  #                             "water"
  #                   )) %>%
  #   osmdata_sf()

  ## About this error message in plot below:
  # https://ggplot2.tidyverse.org/reference/stat_sf_coordinates.html
  # "A function that takes a sfc object and returns a sfc_POINT with the same length as the input.
  # If NULL, function(x) sf::st_point_on_surface(sf::st_zm(x)) will be used. Note that the function
  # may warn about the incorrectness of the result if the data is not projected, but you can ignore
  # this except when you really care about the exact locations.

  ### Build the map, going step by step.
  p <- ggplot(data = plotperim) +
    ### Homes layer
    geom_sf(
      data = maphomes,
      mapping = aes(colour = destroyed), alpha = 1,
      size = 0.2, shape = 3, show.legend = "point"
    ) +
    ### Fire perimeter layer
    geom_sf(data = plotperim, color = "red", fill = NA, alpha = .7)

  ## Add street map layers from OSM, only using layers that return non-zero rows (or else this breaks).
  if (!is.null(nrow(small_streets$osm_lines))) {
    p <- p + geom_sf(
      data = small_streets$osm_lines,
      inherit.aes = FALSE,
      color = "#666666",
      size = .2,
      alpha = .3
    )
  }
  if (!is.null(nrow(med_streets$osm_lines))) {
    p <- p + geom_sf(
      data = med_streets$osm_lines,
      inherit.aes = FALSE,
      color = "black",
      size = .3,
      alpha = .5
    )
  }
  if (!is.null(nrow(big_streets$osm_lines))) {
    p <- p + geom_sf(
      data = big_streets$osm_lines,
      inherit.aes = FALSE,
      color = "black",
      size = .5,
      alpha = .6
    )
  }
  ## Add city names
  p <- p + geom_sf_label(
    data = plotcities,
    aes(label = city),
    size = 2,
    label.padding = unit(0.1, "lines"),
    label.size = NA,
    fill = alpha(c("white"), 0.5)
  )
  ## Limit map to bounding box of buffered perimeter
  p <- p + coord_sf(
    xlim = c(box[1], box[3]),
    ylim = c(box[2], box[4]),
    expand = FALSE
  )
  ## Plot options
  p <- p +
    scale_color_viridis_d(option = "viridis", begin = 0.1, end = 0.75, direction = -1) +
    # scale_colour_brewer(palette='Set1') +
    guides(colour = guide_legend(
      override.aes = list(alpha = 1, size = 2),
      title = "Homes in Perimeter",
      title.position = "top",
      title.hjust = 0.5,
      direction = "horizontal",
      reverse = TRUE, # List Destroyed first (1), then Survived (0)
      title.theme = element_text(size = 7),
      label.theme = element_text(size = 7)
    )) +
    theme_map() +
    theme(
      legend.position = "bottom",
      legend.justification = "center",
      legend.margin = margin(1, 1, 1, 1, unit = "mm"),
      legend.box.spacing = unit(1, "mm"),
      legend.box.background = element_rect()
    )

  ## Save image file
  dir.create(file.path(RES, "detail_maps", "withhomes"), showWarnings = FALSE) # Create directory if it doesn't exist
  save_plot(file.path(RES, "detail_maps", "withhomes", paste0(i, ".png")), plot = p)

  rm(p, plotperim, bufferedperim, maphomes, box, plotcities, big_streets, med_streets, small_streets, names)
}

lapply(bigincids, pretty_map_fires)
